setwd("")

#Set up the data we need
network <- read.csv("NetworkEdges.csv")
network<-sapply(network, toupper)

striatal_genes <- read.csv("striatal_genes.csv")
striatal_genes<-sapply(striatall_genes, toupper)

keep_network<-subset(network,network$name %in% striatal_genes$Genes & network$name.1 %in% striatal_genes$Genes) #filter out parts of STRING network not in original striatal genes list
network_list<- read.csv("1000_striatal_gene_lists.csv")
network_list<-sapply(network_list, toupper)
library(igraph)

#Lists to hold all the network statistics we need
#Example: list of edges
#Lists to hold all the network statistics we need
edge_lengths <- c()
num_nodes <- c()
num_singletons <- c()
average_degrees <- c()
num_clusters_wt <- c()
modularity_wt <- c()
num_clusters_im <- c()
modularity_im <- c()
avg_wt_size <- c()
avg_im_size <- c()
#Build the networks and get the network stats
#change c(1:2) to c(1:ncol(network_list)) to parse entire file
num_genes <- colSums(!is.na(network_list))
for(i in c(1:ncol(network_list))){ 
  built_network<-subset(keep_network,keep_network$name %in% network_list[,i] & keep_network$name.1 %in% network_list[,i])
  graph <- graph_from_data_frame(built_network, directed = FALSE, vertices = NULL)
  #Put the stuff you want to calculate here
  #Example: gets the number of edges in the network
  edges = nrow(built_network)
  edge_lengths <- c(edge_lengths,edges)
  #Number of nodes in the network
  nodes <- unique(built_network$name,built_network$name.1)
  len_nodes<-length(nodes)
  num_nodes <- c(num_nodes,len_nodes)
  #Number of singletons
  #Number of singletons
  singletons<-num_genes[i]-len_nodes
  num_singletons <- c(num_singletons,singletons)
  #Average # edges/node
  avg <- (2*edges)/len_nodes
  average_degrees <- c(average_degrees,avg)
  #Cluster graph using walktrap
  wtc<-cluster_walktrap(graph, weights = E(graph)$weight, steps = 4,
                        merges = TRUE, modularity = TRUE, membership = TRUE)
  mod_wt <- modularity(wtc)
  clusters <-groups(wtc)
  clusters <- Filter(function(x){length(x)>=4},clusters)
  clust_size_wt <- sum(lengths(clusters))/length(clusters)
  num_clusters_w<-length(clusters)
  num_clusters_wt <- c(num_clusters_wt,num_clusters_w)
  modularity_wt <- c(modularity_wt,mod_wt)
  avg_wt_size <- c(avg_wt_size,clust_size_wt)
  #Cluster with infomap
  imc<-cluster_infomap(graph, e.weights = NULL, v.weights = NULL,
                       nb.trials = 10, modularity = TRUE)
  mod_im <- modularity(imc)
  clusters <-groups(imc)
  clusters <- Filter(function(x){length(x)>=4},clusters)
  clust_size_im <- sum(lengths(clusters))/length(clusters)
  avg_im_size <- c(avg_im_size,clust_size_im)
  num_clusters<-length(clusters)
  num_clusters_im <- c(num_clusters_im,num_clusters)
  modularity_im <- c(modularity_im,mod_im)
  #PPI enrichment
}

#Store All (A) original data
Aedge_lengths <- edge_lengths
Anum_nodes <- num_nodes
Anum_singletons <- num_singletons
Aaverage_degrees <- average_degrees
Anum_clusters_wt <- num_clusters_wt
Amodularity_wt <- modularity_wt
Anum_clusters_im <- num_clusters_im
Amodularity_im <- modularity_im
Aavg_wt_size <- avg_wt_size
Aavg_im_size <- avg_im_size 

#Empty lists to hold all the network statistics we need for other network
edge_lengths <- c()
avg_betweenness<-c()
num_nodes <- c()
num_singletons <- c()
average_degrees <- c()
num_clusters_wt <- c()
modularity_wt <- c()
num_clusters_im <- c()
modularity_im <- c()
avg_wt_size <- c()
avg_im_size <- c()

network_actual<-read.csv("network_gene_members.csv",na.strings=c("","NA"))
network_actual<-sapply(network_actual, toupper)
num_genes <- colSums(!is.na(network_actual))
list<- c()
for(i in c(1:4)){
  built_network<-subset(keep_network,keep_network$name %in% network_actual[,i] & keep_network$name.1 %in% network_actual[,i])
  graph <- graph_from_data_frame(built_network, directed = FALSE, vertices = NULL)
  #Put the stuff you want to calculate here
  #Example: gets the number of edges in the network
  edges = nrow(built_network)
  edge_lengths <- c(edge_lengths,edges)
  #Number of nodes in the network
  nodes <- unique(built_network$name,built_network$name.1)
  len_nodes<-length(nodes)
  num_nodes <- c(num_nodes,len_nodes)
  #Betweeness
  between<-betweenness(graph)
  avg_between<-mean(between)
  avg_betweenness<-c(avg_betweenness,avg_between)
  #Number of singletons
  singletons<-num_genes[i]-len_nodes
  num_singletons <- c(num_singletons,singletons)
  #Average # edges/node
  avg <- (2*edges)/len_nodes
  average_degrees <- c(average_degrees,avg)
  #Cluster graph using walktrap
  wtc<-cluster_walktrap(graph, weights = E(graph)$weight, steps = 4,
                        merges = TRUE, modularity = TRUE, membership = TRUE)
  mod_wt <- modularity(wtc)
  clusters <-groups(wtc)
  clusters <- Filter(function(x){length(x)>=4},clusters)
  clust_size_wt <- sum(lengths(clusters))/length(clusters)
  num_clusters_w<-length(clusters)
  num_clusters_wt <- c(num_clusters_wt,num_clusters_w)
  modularity_wt <- c(modularity_wt,mod_wt)
  avg_wt_size <- c(avg_wt_size,clust_size_wt)
  #Cluster with infomap
  imc<-cluster_infomap(graph, e.weights = NULL, v.weights = NULL,
                       nb.trials = 10, modularity = TRUE)
  mod_im <- modularity(imc)
  avg_betweenness<-avg_betweenness
  clusters <-groups(imc)
  clusters <- Filter(function(x){length(x)>=4},clusters)
  avg_im_size <- c(avg_im_size,clust_size_im)
  num_clusters<-length(clusters)
  clust_size_im <- sum(lengths(clusters))/length(clusters)
  num_clusters_im <- c(num_clusters_im,num_clusters)
  modularity_im <- c(modularity_im,mod_im)
  #PPI enrichment
}


list2 <- c("edge_lengths","num_nodes","num_singletons","average_degrees","num_clusters_wt","avg_wt_size","modularity_wt","num_clusters_im","avg_im_size","modularity_im", "avg_betweenness")



siglist<-c()
for(l in list2){
  a<-paste("A",l,sep="")
  l[1]
for(i in c(1:4)){
pop_sd <- sd(get(a))*sqrt((length(get(a))-1)/(length(get(a)))) 
pop_mean <- mean(get(a))

items <- get(l)
z <- (items[i] - pop_mean) / pop_sd
pvalue2sided=2*pnorm(-abs(z))
pvalue1sided=pnorm(-abs(z))
name<-colnames(network_actual)[i]
sig<-paste(l,name)
sig<-paste(sig,pvalue1sided)
siglist<- c(siglist,sig)

}
}
siglist